package com.ml.room.terrace.process;

import cn.hutool.core.collection.CollectionUtil;
import cn.hutool.http.HtmlUtil;
import com.alibaba.fastjson.JSON;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.metadata.IPage;
import com.ml.room.dao.IBuildingProjectDao;
import com.ml.room.dao.IBuildingProjectItemDao;
import com.ml.room.dao.IBuildingRecordDao;
import com.ml.room.repository.entity.BuildingProject;
import com.ml.room.repository.entity.BuildingRecord;
import com.ml.room.service.IBuildingRecordService;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Selectable;

import java.util.List;

/**
 * @ClassName: BuildingProjectPageProcess
 * @Decription: 【云房平台】 - 房屋项目 - 数据抽取
 * @Author: IDai
 * @Date: 2021-08-30 16:49 星期一
 **/
@Component
public class BuildingProjectPageProcess implements PageProcessor {

    @Autowired
    private IBuildingProjectDao buildingProjectDao;
    //备案号的主键id
    public String recordId;

    public String getRecordId() {
        return recordId;
    }

    public void setRecordId(String recordId) {
        this.recordId = recordId;
    }

    /** 日志打印工具 **/
    private static final Logger logger = LoggerFactory.getLogger(BuildingRecordPageProcess.class);

    public static final String PREFIX_URL = "http://220.178.124.94:8010/fangjia/ws/";

    //设置抓取参数。详细配置见官方文档介绍 抓取网站的相关配置，包括编码、抓取间隔、重试次数等
    private final Site site = Site.me().setRetryTimes(3).setSleepTime(1000).setTimeOut(10000);
    //.setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31");

    @Override
    public void process(Page page) {
        logger.info("【合肥新房】 - 房建项目 - 数据抽取，入参page：{}", JSON.toJSONString(page));
        BuildingProject buildingProject = new BuildingProject();
        String projectName = page.getHtml().css("table#top tr span#txtTitle font","text").toString();
        String projectBody = page.getHtml().css("#IsTableShow").toString();
        //判断是否含有此元素节点
        boolean flag = StringUtils.isNotBlank(projectBody);;
        int result = buildingProjectDao.isHaveThisBuildingProject(projectName);
        if (result == 0){
            buildingProject.setRecordId(recordId);
            //没有重复的房建项目
            buildingProject.setProjectName(projectName);
            if (flag){
                //对下面的数据进行抽取
                List<Selectable> trContents = page.getHtml().css("table#IsTableShow tr").nodes();
                if (CollectionUtil.isNotEmpty(trContents)){
                    buildingProject.setLocationArea(trContents.get(0).css("span#txtLpLocation","text").toString());
                    buildingProject.setSeatingPosition(trContents.get(0).css("span#txtLpArea","text").toString());
                    buildingProject.setPropertyClass(trContents.get(1).css("span#txtLpwyType","text").toString());
                    buildingProject.setBuildingTypes(trContents.get(1).css("span#txtLpBuildType","text").toString());
                    buildingProject.setDevelopEnterprise(trContents.get(2).css("span#txtLpkfEnterprise","text").toString());
                    buildingProject.setDesignUnit(trContents.get(2).css("span#txtLpsjEnterprise","text").toString());
                    buildingProject.setPropertyCompany(trContents.get(3).css("span#txtLpwyEnterprise","text").toString());
                    buildingProject.setCircumMating(trContents.get(3).css("span#txtLpRim","text").toString());
                    String projectInfo = trContents.get(4).css("div").toString();
                    if (StringUtils.isNotBlank(projectInfo)){
                        buildingProject.setProjectInfo(HtmlUtil.cleanHtmlTag(projectInfo).replaceAll("\n",""));
                    }
                    buildingProject.setTrafficCondition(trContents.get(5).css("span#txtLpTraffic","text").toString());
                }
            }
            //把后续 pipeline 要保持的数据存储起来
            page.putField("buildingProject", buildingProject);
        }
        page.putField("recordId", recordId);
        logger.info("【合肥新房】 - 房建项目 - 数据抽取，出参buildingProject：{}", JSON.toJSONString(buildingProject));
    }

    @Override
    public Site getSite() {
        return site;
    }

}
